In [1]:
    
import sdm as sdmlib
import matplotlib.pyplot as plt
import time
from collections import defaultdict, OrderedDict
from IPython.display import clear_output
%matplotlib inline
    
In [43]:
    
class Timer(object):
    def __init__(self):
        self.stats = OrderedDict()
        self.current_stats = None
        
    def start(self, key, name):
        self.current_stats = {
            'name': name,
            't0_loops': [],
            't0': time.time(),
        }
        self.stats[key] = self.current_stats
        self.t0 = time.time()
        
    def measure_loop(self):
        self.current_stats['t0_loops'].append(time.time())
        
    def end(self):
        d = self.current_stats
        d['dt'] = time.time() - d['t0']
        if d['t0_loops']:
            d['number-of-loops'] = len(d['t0_loops'])
            d['loops-per-second'] = d['number-of-loops'] / d['dt']
            d['loops'] = []
            
            t0 = d['t0']
            for t1 in d['t0_loops']:
                d['loops'].append(t1-t0)
                t0 = t1
        d.pop('t0')
        d.pop('t0_loops')
        self.current_stats = None
    
In [3]:
    
def run_part1(bits, sample, radius, nlinear=1000, nthread=5000, nopencl=5000):
    address_space = sdmlib.AddressSpace.init_random(bits, sample)
    address_space.opencl_init();
    
    timer = Timer()
    print('Running linear scan...')
    timer.start('scan_linear', 'Linear scan')
    for i in range(nlinear):
        bs = sdmlib.Bitstring.init_random(bits)
        address_space.scan_linear2(bs, radius)
        timer.measure_loop()
    timer.end()
    timer.start('scan_thread', 'Thread scan')
    print('Running thread scan...')
    for i in range(nthread):
        bs = sdmlib.Bitstring.init_random(bits)
        address_space.scan_thread2(bs, radius)  
        timer.measure_loop()
    timer.end()
    
    timer.start('scan_opencl', 'OpenCL scan')
    print('Running OpenCL scan...')
    for i in range(nopencl):
        bs = sdmlib.Bitstring.init_random(bits)
        address_space.scan_opencl2(bs, radius)
        timer.measure_loop()
    timer.end()
    
    return timer
    
In [1]:
    
def results_part1(bits, timer):
    from IPython.display import display, Markdown
    v = [['', 'Loops', 'Total time', 'Scans per second', 'Time per scan (ms)']]
    v.append(['---', '---:', '---:', '---:', '---:'])
    for key in ['scan_linear', 'scan_thread', 'scan_opencl']:
        stats = timer.stats[key]
        v.append([
            stats['name'],
            stats['number-of-loops'],
            stats['dt'],
            stats['loops-per-second'],
            1000.0 / stats['loops-per-second'],
        ])
    txt = '\n'.join(['|'.join([str(x) for x in row]) for row in v])
    display(Markdown(txt))
    
    latex = '\\ \hfill\n'.join([' & '.join([str(x) for x in row]) for row in v])
    print latex
    plt.figure(figsize=(8, 6), dpi=300)
    plt.hist([1000*x for x in timer.stats['scan_linear']['loops']], bins='fd', density=True, alpha=0.9, label='Linear scan')
    plt.hist([1000*x for x in timer.stats['scan_thread']['loops']], bins='fd', density=True, alpha=0.9, label='Thread scan')
    plt.hist([1000*x for x in timer.stats['scan_opencl']['loops']], bins='fd', density=True, alpha=0.9, label='OpenCL scan')
    plt.legend()
    plt.title('Scan performance ($n={}$ bits)'.format(bits))
    plt.xlabel('Scan duration (ms)')
    plt.ylabel('Probability')
    
In [5]:
    
timer1000 = run_part1(1000, 1000000, 451)
    
    
In [2]:
    
results_part1(1000, timer1000)
    
    
In [7]:
    
timer256 = run_part1(256, 1000000, 103)
    
    
In [3]:
    
results_part1(256, timer256)
plt.xlim(2.4, 7.5);
    
    
In [9]:
    
timer10k = run_part1(10000, 1000000, 4845, nlinear=100, nthread=500, nopencl=1000)
    
    
In [14]:
    
results_part1(10000, timer10k)
plt.xlim(7, 150);
    
    
    
In [18]:
    
def read_write(key, name, timer, bits, sample, radius, scanner_type, n):
    address_space = sdmlib.AddressSpace.init_random(bits, sample)
    counter = sdmlib.Counter.init_zero(bits, sample)
    sdm = sdmlib.SDM(address_space, counter, radius, scanner_type)
    timer.start('write_{}'.format(key), '{} write'.format(name))
    for _ in range(n):
        bs = sdmlib.Bitstring.init_random(bits)
        sdm.write(bs, bs)
        timer.measure_loop()
    timer.end()
    
    timer.start('read_{}'.format(key), '{} single read'.format(name))
    for _ in range(n):
        bs = sdmlib.Bitstring.init_random(bits)
        sdm.read(bs)
        timer.measure_loop()
    timer.end()
    
    return timer
    
In [23]:
    
def run_part2(bits, sample, radius, nthread=1000, nopencl=1000):
    timer = Timer()
    print 'Running thread...'
    read_write('thread', 'Thread', timer, bits, sample, radius, sdmlib.SDM_SCANNER_THREAD, n=nthread)
    print 'Running OpenCL...'
    read_write('opencl', 'OpenCL', timer, bits, sample, radius, sdmlib.SDM_SCANNER_OPENCL, n=nopencl)
    return timer
    
In [65]:
    
def results_part2(bits, timer, hist_xlim=None):
    from IPython.display import display, Markdown
    v = [['', 'Loops', 'Total time', 'Operation per second', 'Time per operation (ms)']]
    v.append(['---', '---:', '---:', '---:', '---:'])
    for key in timer.stats.keys():
        stats = timer.stats[key]
        v.append([
            stats['name'],
            stats['number-of-loops'],
            stats['dt'],
            stats['loops-per-second'],
            1000.0 / stats['loops-per-second'],
        ])
    txt = '\n'.join(['|'.join([str(x) for x in row]) for row in v])
    display(Markdown(txt))
    plt.figure(figsize=(8, 6), dpi=300)
    for key in ['write_thread', 'write_opencl']:
        stats = timer.stats[key]
        plt.hist([1000*x for x in stats['loops']], bins='fd', density=True, alpha=0.9, label=stats['name'])
    plt.legend()
    plt.title('Write performance ($n={}$ bits)'.format(bits))
    plt.xlabel('Scan duration (ms)')
    plt.ylabel('Probability')
    if hist_xlim:
        plt.xlim(*hist_xlim)
    plt.show()
    
    plt.figure(figsize=(8, 6), dpi=300)
    for key in ['read_thread', 'read_opencl']:
        stats = timer.stats[key]
        plt.hist([1000*x for x in stats['loops']], bins='fd', density=True, alpha=0.9, label=stats['name'])
    plt.legend()
    plt.title('Read performance ($n={}$ bits)'.format(bits))
    plt.xlabel('Scan duration (ms)')
    plt.ylabel('Probability')
    if hist_xlim:
        plt.xlim(*hist_xlim)
    
In [40]:
    
p2timer1000 = run_part2(1000, 1000000, 451, nthread=1000, nopencl=1000)
    
    
In [67]:
    
results_part2(1000, p2timer1000, hist_xlim=(0, 23))
    
    
    
    
In [53]:
    
p2timer256 = run_part2(256, 1000000, 103, nthread=2000, nopencl=2000)
    
    
In [61]:
    
results_part2(256, p2timer256, hist_xlim=(0, 8))
    
    
    
    
In [ ]:
    
#p2timer10k = run_part2(10000, 1000000, 4845, nthread=500, nopencl=1000)
    
    
In [ ]:
    
#results_part2(10000, p2timer10k, hist1_xlim=(0, 8), hist2_xlim=(0, 8))
    
In [57]:
    
def run_part3(bits, sample, radius, n=1000):
    address_space = sdmlib.AddressSpace.init_random(bits, sample)
    address_space.opencl_init()
    #address_space.opencl_opts.verbose = 1
    bs = sdmlib.Bitstring.init_random(bits)
    expected = set(address_space.scan_opencl2(bs, radius))
    
    timer = Timer()
    for kernel in sdmlib.OPENCL_KERNEL_NAMES:
        address_space.set_opencl_kernel(kernel)
        
        print('Running kernel {}...'.format(kernel))
        timer.start(kernel, kernel)
        for _ in range(n):
            address_space.scan_opencl2(bs, radius)
            timer.measure_loop()
        timer.end()
    return timer
    
In [70]:
    
def results_part3(bits, timer, hist_xlim=None):
    from IPython.display import display, Markdown
    v = [['', 'Loops', 'Total time', 'Scans per second', 'Time per scan (ms)']]
    v.append([':---', '---:', '---:', '---:', '---:'])
    for key in timer.stats.keys():
        stats = timer.stats[key]
        v.append([
            stats['name'],
            stats['number-of-loops'],
            stats['dt'],
            stats['loops-per-second'],
            1000.0 / stats['loops-per-second'],
        ])
    txt = '\n'.join(['|'.join([str(x) for x in row]) for row in v])
    display(Markdown(txt))
    
    plt.figure(figsize=(8, 6), dpi=300)
    for key in timer.stats.keys():
        stats = timer.stats[key]
        plt.hist([1000*x for x in stats['loops']], bins='fd', density=True, alpha=0.75, label=stats['name'])
    plt.legend()
    plt.title('Kernels performance ($n={}$ bits)'.format(bits))
    plt.xlabel('Scan duration (ms)')
    plt.ylabel('Probability')
    if hist_xlim:
        plt.xlim(*hist_xlim)
    plt.show()
    
In [58]:
    
p3timer1000 = run_part3(1000, 1000000, 451, n=3000)
    
    
In [79]:
    
results_part3(1000, p3timer1000, hist_xlim=(2, 7))
    
    
    
In [72]:
    
p3timer256 = run_part3(256, 1000000, 103, n=3000)
    
    
In [75]:
    
results_part3(256, p3timer256, hist_xlim=(2, 7))
    
    
    
In [76]:
    
p3timer10k = run_part3(10000, 1000000, 4845, n=500)
    
    
In [82]:
    
results_part3(10000, p3timer10k, hist_xlim=(8, 70))
    
    
    
In [84]:
    
results_part3(10000, p3timer10k, hist_xlim=(10, 15))
    
    
    
In [ ]: